
rm(list=ls())
set.seed(21301)
setwd("D:/Dropbox/jesarey_documents/Measuring Publication Bias/pub-bias-replication-files")

library(foreign)
library(ggplot2)

my.data.set<-read.dta(file="publication_data_2016-5-29.dta")
data.full<-my.data.set[which(my.data.set$number_of_dv==1),]

# count articles before pruning
dim(data.full) # how big data set?
sum(data.full$journal=="APSR")
sum(data.full$journal=="AJPS")

# continuous dependent variables only, first DV
data<-my.data.set[ which(my.data.set$model==1 & my.data.set$number_of_dv==1),]

# removing observations with missing values
est.miss<-is.na(data$estimate)
se.miss<-is.na(data$se)
size.miss<-is.na(data$sample_size)
data$miss<-est.miss+se.miss+size.miss
# if any num_ivs are missing, set them to 2 (the smallest reasonable choice)
data$num_ivs[which(is.na(data$num_ivs))] <- 2

dat<-subset(data, subset=data$miss<1, select=c("journal", "title", "estimate", "se", "sample_size", "tails", "num_ivs", "doi"))
dim(dat) # how big data set?
sum(dat$journal=="APSR")
sum(dat$journal=="AJPS")


sig.cut<- abs(qt(0.025, df= (dat$sample_size - dat$num_ivs - 1)))
dat$notsig<-ifelse(abs(dat$estimate/dat$se)<sig.cut,1,0)

sum(dat$notsig) # how many non-significant results published? = 25
sum(dat$notsig)/length(dat$notsig) # what % of non-significant results published? = 0.1497006


# exclude the non-significant results and observations
dat<-subset(dat, subset=(dat$notsig==0))
t.dat<-dat$estimate/dat$se
n.dat<-dat$sample_size

dim(dat) # how big data set?

attach(dat)

# a sub-function to generate a large set of draws from the prior
# for beta, simulate sampling variation using the estimated se for
# beta, and then calculate publication bias

# for spike-and-slab distributed beta

gen.samp <- function(se.b, df.b, pr.false, n.draws, bound, seed, alpha){
  
  if(seed!="off"){
    set.seed(seed)
  }
  b.true <- runif(n.draws, min=(-1)*bound, max=bound)
  b.false <- rep(0, n.draws)
  choice <- ifelse(runif(n.draws)<(1-pr.false), 1, 0)
  b.test <- ifelse(choice==1, b.true, b.false)
  
  b.test.est <- b.test + se.b*rt(n.draws, df=df.b)
  
  sig <- ifelse( abs(b.test.est/se.b) >= abs(qt((alpha/2), df=df.b)), 1, 0 )
  
  list.out <- list(b.true = b.test, b.est = b.test.est, b.sig = sig)
  
  return(list.out)
  
}



bias.calc <- function(b, se.b, df.b, pr.false, bound = 3 * abs(b), n.draws = 100000, seed=123456, alpha=0.05 ){
  
  list.store <- gen.samp(se.b, df.b, pr.false, n.draws, bound, seed, alpha)
  sig <- which(list.store$b.sig==1)
  bias <- ( list.store$b.est[sig] - list.store$b.true[sig] )
  mult <- sign(list.store$b.true[sig])
  mult <- ifelse(mult==0, sign(bias), mult)
  #return( mean(mult*bias)/b * sign(b) )
  return( mean(mult*bias)/abs(b) )
}


b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.9)
  
}
mean(b.bias)                                   # 0.298437             
length(which(b.bias>=0.1))/length(b.bias)      # 0.8732394

dat$b.bias.plot <- 100*(b.bias)

# make category of p-value
dat$t <- abs( dat$estimate / dat$se )
dat$p <- 2*(1 - pt(q=dat$t, df=(dat$sample_size-dat$num_ivs-1)))
dat$p.cat <- cut_interval(dat$p, length=0.01)
dat$bias.cat <- cut_interval(dat$b.bias.plot, length=10)

# some helpful code was taken from
# http://stackoverflow.com/questions/1330989/rotating-and-spacing-axis-labels-in-ggplot2
# and http://stackoverflow.com/questions/14622421/how-to-change-legend-title-in-ggplot-density
# and http://stackoverflow.com/questions/5981601/recommendations-for-black-and-white-color-scheme-with-ggplot2
p <- ggplot(dat, aes(x = bias.cat)) +  geom_bar(aes(y = (..count..)/sum(..count..), fill=p.cat)) + theme(axis.text.x = element_text(angle = 90, hjust = 1), plot.title = element_text(size=12)) + labs(x = "Expected Bias (as % of published estimate)", y = "Frequency") + guides(fill = guide_legend("Result p-value")) +  scale_fill_grey(start = 0.2, end = 0.7) + annotate("text", label = "mean bias = 29.8%", y = 0.265, x=3.5, size = 3) + ggtitle("baseline 10% chance of non-zero relationship")

p
ggsave(filename="bias-hist.pdf", plot=p, device="pdf", width=6, height=5)



b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.8)
  
}
mean(b.bias)                                    # 0.188862
length(which(b.bias>=0.1))/length(b.bias)       # 0.7816901



b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.5)
  
}
mean(b.bias)                                     # 0.08906531
length(which(b.bias>=0.1))/length(b.bias)        # 0.415493




b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0)
  
}
mean(b.bias)                                     # 0.04733124
length(which(b.bias>=0.1))/length(b.bias)        # 0.1408451


dat$b.bias.plot <- 100*(b.bias)

# make category of p-value
dat$t <- abs( dat$estimate / dat$se )
dat$p <- 2*(1 - pt(dat$t, (dat$sample_size-dat$num_ivs-1)))
dat$p.cat <- cut_interval(dat$p, length=0.01)
dat$bias.cat <- cut_interval(dat$b.bias.plot, length=2)

# some helpful code was taken from
# http://stackoverflow.com/questions/1330989/rotating-and-spacing-axis-labels-in-ggplot2
# and http://stackoverflow.com/questions/14622421/how-to-change-legend-title-in-ggplot-density
# and http://stackoverflow.com/questions/5981601/recommendations-for-black-and-white-color-scheme-with-ggplot2
p <- ggplot(dat, aes(x = bias.cat)) +  geom_bar(aes(y = (..count..)/sum(..count..), fill=p.cat)) + theme(axis.text.x = element_text(angle = 90, hjust = 1), plot.title = element_text(size=12)) + labs(x = "Expected Bias (as % of published estimate)", y = "Frequency") + guides(fill = guide_legend("Result p-value")) +  scale_fill_grey(start = 0.2, end = 0.7) + annotate("text", label = "mean bias = 4.73%", y = 0.265, x=4, size = 3) + ggtitle("baseline 100% chance of non-zero relationship")

p
ggsave(filename="bias-hist-nospike.pdf", plot=p, device="pdf", width=6, height=5)




b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.9, bound = 10*abs(dat$estimate[i]))
  
}
mean(b.bias)                                                    # 0.2438803
length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)        # 0.5070423





# what about a lower threshold for significance?

b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.9, alpha=0.01)
  
}
mean(b.bias)                                   # 0.1556354             
length(which(b.bias>=0.1))/length(b.bias)      # 0.6690141




b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0, alpha=0.01)
  
}
mean(b.bias)                                     # 0.0559474
length(which(b.bias>=0.1))/length(b.bias)        # 0.2042254












# now repeat the analysis above for a normal prior

gen.norm.samp <- function(se.b, df.b, pr.false, n.draws, bound, seed, alpha){
  
  if(seed!="off"){
    set.seed(seed)
  }
  b.true <- rnorm(n.draws, mean=0, sd=bound)
  b.false <- rep(0, n.draws)
  choice <- ifelse(runif(n.draws)<(1-pr.false), 1, 0)
  b.test <- ifelse(choice==1, b.true, b.false)
  
  b.test.est <- b.test + se.b*rt(n.draws, df=df.b)
  
  sig <- ifelse( abs(b.test.est/se.b) >= abs(qt((alpha/2), df=df.b)), 1, 0 )
  list.out <- list(b.true = b.test, b.est = b.test.est, b.sig = sig)
  return(list.out)
  
}



bias.calc.norm <- function(b, se.b, df.b, pr.false, bound = abs(b), n.draws = 100000, seed=123456, alpha=0.05 ){
  
  list.store <- gen.norm.samp(se.b, df.b, pr.false, n.draws, bound, seed, alpha)
  sig <- which(list.store$b.sig==1)
  bias <- ( list.store$b.est[sig] - list.store$b.true[sig] )
  mult <- sign(list.store$b.true[sig])
  mult <- ifelse(mult==0, sign(bias), mult)
  return( mean(mult*bias)/b * sign(b) )
  
}


b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc.norm(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.9)
  
}
mean(b.bias)                                                         # 0.4046605
length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)             # 0.528169

dat$b.bias.plot <- 100*(b.bias)

# make category of p-value
dat$t <- abs( dat$estimate / dat$se )
dat$p <- 2*(1 - pt(dat$t, (dat$sample_size-dat$num_ivs-1)))
dat$p.cat <- cut_interval(dat$p, length=0.01)
dat$bias.cat <- cut_interval(dat$b.bias.plot, length=15)

# some helpful code was taken from
# http://stackoverflow.com/questions/1330989/rotating-and-spacing-axis-labels-in-ggplot2
# and http://stackoverflow.com/questions/14622421/how-to-change-legend-title-in-ggplot-density
# and http://stackoverflow.com/questions/5981601/recommendations-for-black-and-white-color-scheme-with-ggplot2
p <- ggplot(dat, aes(x = bias.cat)) +  geom_bar(aes(y = (..count..)/sum(..count..), fill=p.cat)) + theme(axis.text.x = element_text(angle = 90, hjust = 1), plot.title = element_text(size=12)) + labs(x = "Expected Bias (as % of published estimate)", y = "Frequency") + guides(fill = guide_legend("Result p-value")) +  scale_fill_grey(start = 0.2, end = 0.7) + annotate("text", label = "mean bias = 40.5%", y = 0.315, x=4, size = 3) + ggtitle("baseline 10% chance of non-zero relationship")

p
ggsave(filename="bias-hist-norm.pdf", plot=p, device="pdf", width=6, height=5)




b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc.norm(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.8)
  
}
mean(b.bias)                                                         # 0.2924327
length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)             # 0.4788732


b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc.norm(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.5)
  
}
mean(b.bias)                                                         # 0.177636
length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)             # 0.443662




b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc.norm(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0)
  
}
mean(b.bias)                                                         # 0.1237288
length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)             # 0.4225352

dat$b.bias.plot <- 100*(b.bias)

# make category of p-value
dat$t <- abs( dat$estimate / dat$se )
dat$p <- 2*(1 - pt(dat$t, (dat$sample_size-dat$num_ivs-1)))
dat$p.cat <- cut_interval(dat$p, length=0.01)
dat$bias.cat <- cut_interval(dat$b.bias.plot, length=5)

# some helpful code was taken from
# http://stackoverflow.com/questions/1330989/rotating-and-spacing-axis-labels-in-ggplot2
# and http://stackoverflow.com/questions/14622421/how-to-change-legend-title-in-ggplot-density
# and http://stackoverflow.com/questions/5981601/recommendations-for-black-and-white-color-scheme-with-ggplot2
p <- ggplot(dat, aes(x = bias.cat)) +  geom_bar(aes(y = (..count..)/sum(..count..), fill=p.cat)) + theme(axis.text.x = element_text(angle = 90, hjust = 1), plot.title = element_text(size=12)) + labs(x = "Expected Bias (as % of published estimate)", y = "Frequency") + guides(fill = guide_legend("Result p-value")) +  scale_fill_grey(start = 0.2, end = 0.7) + annotate("text", label = "mean bias = 12.4%", y = 0.275, x=3.5, size = 3) + ggtitle("baseline 100% chance of non-zero relationship")

p
ggsave(filename="bias-hist-norm-nospike.pdf", plot=p, device="pdf", width=6, height=5)


b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc.norm(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), bound = 10*abs(dat$estimate[i]) , p=0.9)
  
}
mean(b.bias)                                                         # 0.2415869
length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)             # 0.5070423


# what about a lower threshold for significance?

b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc.norm(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.9, alpha=0.01)
  
}
mean(b.bias)                                                         # 0.2959394
length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)             # 0.471831



b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc.norm(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0, alpha=0.01)
  
}
mean(b.bias)                                                         # 0.1491986
length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)             # 0.4366197





# calculate Pr(null | significant) using Bayes' Rule:
#
#
#                                    Pr(significant | null) * Pr(null) 
# Pr(null | significant) = ----------------------------------------------------------------
#                           Pr (sig | null) * Pr(null) + Pr(sig | not null) * Pr(not null)
# 
# set Pr(sig | not null) = 1 to establish a lower bound
# set Pr(not null) = 1 - Pr(null) by identity

calc.null.sig <- function(pr.sig.given.null, pr.null){
  
  (pr.sig.given.null * pr.null) / ( ( pr.sig.given.null * pr.null ) + (1 - pr.null) )
  
}

pr.null.plot <- seq(from=0.01, to=0.99, by=0.01)
null.sig.plot <- calc.null.sig(pr.sig.given.null=0.05, pr.null.plot)

pr.sig.null.plot <- seq(from=0.001, to=0.05, by=0.001)
null.sig.plot <- calc.null.sig(pr.sig.null.plot, pr.null=0.95)
null.sig.plot.2 <- calc.null.sig(pr.sig.null.plot, pr.null=0.9)
null.sig.plot.3 <- calc.null.sig(pr.sig.null.plot, pr.null=0.75)
null.sig.plot.4 <- calc.null.sig(pr.sig.null.plot, pr.null=0.5)

dat$t <- abs( dat$estimate / dat$se )
dat$p <- 2*(1 - pt(dat$t, (dat$sample_size-dat$num_ivs-1)))

plotdat <- data.frame(sig.null = rep(pr.sig.null.plot,4), null.sig = c(null.sig.plot, null.sig.plot.2, null.sig.plot.3, null.sig.plot.4), pr.null = as.factor(rep(c(0.95, 0.9, 0.75, 0.5), each=length(pr.sig.null.plot))) )

# used some helpful code from
# http://stackoverflow.com/questions/16712800/overlay-lines-and-hist-with-ggplot2

p <- ggplot(plotdat) + geom_line(aes(x=sig.null, y=null.sig, linetype=pr.null)) + theme_bw() + labs(x = "Pr(statistically significant | null)", y = "Lines: Pr(null | statistically significant) \n Histogram: Proportion of Published p-values") + guides(linetype = guide_legend("Prior Pr(null)")) + scale_y_continuous(breaks=seq(from=0, to=0.6, by=0.05))  + scale_x_continuous(breaks=seq(from=0, to=0.05, by=0.01)) + geom_rug(col=rgb(.5,0,0,alpha=.2)) + geom_histogram(data=dat, aes(x=p, y = ..count../sum(..count..)), alpha=0.2, binwidth=0.005)
p
ggsave(filename="false-positives.pdf", plot=p, device="pdf", width=6, height=5)


# calculate mean lower bound Pr(null)
null.prob <- calc.null.sig(pr.sig.given.null=dat$p, 0.75)
length(which(null.prob >= 0.10)) / length(null.prob)        # 0.1056338
null.prob <- calc.null.sig(pr.sig.given.null=dat$p, 0.9)
length(which(null.prob >= 0.10)) / length(null.prob)        # 0.2535211
null.prob <- calc.null.sig(pr.sig.given.null=dat$p, 0.95)
length(which(null.prob >= 0.10)) / length(null.prob)        # 0.4084507










####################################
# Appendix calculations 
####################################

# a sub-function to generate a large set of draws from the prior
# for beta, simulate sampling variation using the estimated se for
# beta, and then indicate true and false positives

# for spike-and-slab distributed beta

gen.samp <- function(se.b, df.b, pr.false, n.draws, bound, seed){
  
  if(seed!="off"){
    set.seed(seed)
  }
  b.true <- runif(n.draws, min=(-1)*bound, max=bound)
  b.false <- rep(0, n.draws)
  choice <- ifelse(runif(n.draws)<(1-pr.false), 1, 0)
  b.test <- ifelse(choice==1, b.true, b.false)
  
  b.test.est <- b.test + se.b*rt(n.draws, df=df.b)
  
  sig <- ifelse( abs(b.test.est/se.b) >= abs(qt(0.025, df=df.b)), 1, 0 )
  
  list.out <- list(b.true = b.test, b.est = b.test.est, b.sig = sig)
  
  return(list.out)
  
}



bias.calc <- function(b, se.b, df.b, pr.false, bound = 3 * abs(b), n.draws = 100000, seed=123456 ){
  
  list.store <- gen.samp(se.b, df.b, pr.false, n.draws, bound, seed)
  sig <- which(list.store$b.sig==1)
  bias <- list.store$b.est[sig] - list.store$b.true[sig]
  bias.mod <- loess(bias ~ list.store$b.est[sig], control=loess.control(trace.hat = "approximate"), span=0.2)
  out <-predict(bias.mod, newdata = b)
  return(out)
  
}


some.draws <- gen.samp(se.b = 0.4, df = 100, pr.false = 0, n.draws = 30000, bound = 3, seed = 123456)
sig <- which(some.draws$b.sig==1)
plot.data = data.frame(b.est = some.draws$b.est[sig], b.true = some.draws$b.true[sig])
b.est.given.b.true <- predict( loess(b.est ~ b.true, data=plot.data, span=0.2), data.frame(b.true = seq(-3,3,0.01)) )
plot.data.1 <- data.frame(b.est = b.est.given.b.true, b.true = seq(-3,3,0.01), type = rep(1, length(seq(-3,3,0.01))))
b.true.given.b.est <- predict( loess(b.true ~ b.est, data=plot.data, span=0.2), data.frame(b.est = seq(-3,3,0.01)) )
plot.data.2 <- data.frame(b.est = seq(-3,3,0.01), b.true = b.true.given.b.est, type = rep(2, length(seq(-3,3,0.01))))
plot.data.3 <- rbind(plot.data.1, plot.data.2)
plot.data.3$type <- factor(plot.data.3$type, labels=c("E[beta estimate | true beta]", "E[true beta | beta estimate]"))

p <- ggplot(plot.data) + geom_point(aes(x=b.true, y=b.est), col="darkgray") + geom_line(data = plot.data.3, aes(x=b.true, y=b.est, linetype=type)) + scale_x_continuous(limits=c(-2,2)) + scale_y_continuous(limits=c(-2,2)) + theme(legend.position = "top") + scale_linetype("") + labs(x = "true beta", y = "beta estimate")
suppressWarnings(print(p))
suppressWarnings(ggsave(filename="different-biases.pdf", plot=p, device="pdf", width=6, height=6))




b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.9)
  
}
mean(b.bias)                  
mean(b.bias/dat$estimate)                                     # 0.3798161
length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)      # 0.6478873

b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.8)
  
}
mean(b.bias)
mean(b.bias/dat$estimate)                                      # 0.2774034
length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)       # 0.584507



b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.5)
  
}
mean(b.bias)
mean(b.bias/dat$estimate)                                       # 0.1221786
length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)        # 0.4507042



b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0)
  
}
mean(b.bias)
mean(b.bias/dat$estimate)                                       # -0.0006579831
length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)        # 0






b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.9, bound = 10*abs(dat$estimate[i]))
  
}
mean(b.bias)
mean(b.bias/dat$estimate)                                       # 0.5318233
length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)        # 0.7605634








# now repeat the analysis above for a normal prior

gen.norm.samp <- function(se.b, df.b, pr.false, n.draws, bound, seed){
  
  if(seed!="off"){
    set.seed(seed)
  }
  b.true <- rnorm(n.draws, mean=0, sd=bound)
  b.false <- rep(0, n.draws)
  choice <- ifelse(runif(n.draws)<(1-pr.false), 1, 0)
  b.test <- ifelse(choice==1, b.true, b.false)
  
  b.test.est <- b.test + se.b*rt(n.draws, df=df.b)
  
  sig <- ifelse( abs(b.test.est/se.b) >= abs(qt(0.025, df=df.b)), 1, 0 )
  
  list.out <- list(b.true = b.test, b.est = b.test.est, b.sig = sig)
  
  return(list.out)
  
}



bias.calc.norm <- function(b, se.b, df.b, pr.false, bound = abs(b), n.draws = 100000, seed=123456 ){
  
  list.store <- gen.norm.samp(se.b, df.b, pr.false, n.draws, bound, seed)
  sig <- which(list.store$b.sig==1)
  bias <- list.store$b.est[sig] - list.store$b.true[sig]
  bias.mod <- loess(bias ~ list.store$b.est[sig], control=loess.control(trace.hat = "approximate"), span=0.2)
  out <-predict(bias.mod, newdata = b)
  return(out)
  
}


b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc.norm(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.9)
  
}
mean(b.bias)
mean(b.bias/dat$estimate)                                            # 0.3798531
length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)             # 0.6690141


b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc.norm(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.8)
  
}
mean(b.bias)
mean(b.bias/dat$estimate)                                            # 0.2895558
length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)             # 0.6126761


b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc.norm(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.5)
  
}
mean(b.bias)
mean(b.bias/dat$estimate)                                            # 0.1711088
length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)             # 0.556338




b.bias<-c()
for(i in 1:dim(dat)[1]){
  
  b.bias[i]<-bias.calc.norm(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0)
  
}
mean(b.bias)
mean(b.bias/dat$estimate)                                            # 0.09461296
length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)             # 0.471831


detach(dat)

